

clear all

*Set your CD here, all other folders will be created that are not included in data archive 
cd "T:\Projects\Josh PGE CI\CI project\replication"

mkdir ado
mkdir ado\plus
mkdir "stata data"
mkdir "stata data\regsave"
mkdir "stata data\hourly data save"
mkdir "stata data\generate list of tou data"
mkdir temp
mkdir latex


sysdir set PERSONAL "ado"
sysdir set PLUS     "ado\plus"

ssc install reghdfe
ssc install gtools
ssc install ftools
ssc install unique
ssc install Moremata
ssc install regsave
ssc install estout
ssc install pvvar
ssc install geonear
ssc install ivreg2
ssc install ivreghdfe
ssc install ranktest
ssc install payper




*Step 1 //build some basic datasets that are used later on
do "stata do/step_1_support_datasets.do"

*Step 2 //prepare the raw data from what was provided by PG&E and start cleaning it up. This is run on peak hours (14-17) for people 27 weeks before and after the cutoff date. The main regressions use only these event hours, so I first clean them. 
do "stata do/step_2_prepare_raw_consumption_data.do"

*Step 3 //Merge on weather data. I use data from mesowest, but any hourly weather data with individual stations will work fine. 
do "stata do/step_3_merge_weather_data_to_consumption_data.do"

*step 4 // This does some cleaning to collapse down situations where one establishment might have 2 meters. It's kind of a complicated situation, but it's a quirk of how PG&E's meters work. 
do "stata do/step_4_multiple_meter_cleaning.do"


*step 5 // this runs a slightly different version of steps 2-4, but does so on a slightly different version of the data. It includes all hours of the day, but only for people within 8 weeks of the cutoff. It was computationally not possible to do all hours AND 27 weeks from the cutoff. Step 5 just reruns steps 2-4 with the 8 week bandwidth but all the hours. It also pulls in some of the things calculated in steps 2-4 to make the process easier. One product from this step is a "total" variable which gives the total consumption in the summer of 2014. I use that in step 6 for cleaning. 
do "stata do/step_5_run_for_all_hours_between_step.do"

*step 6 //finish cleaning the regression dataset from steps 2-4. It pulls the "total" variable from step 4 as part of the cleaning. 
do "stata do/step_6_finish_main_regression_data_clean.do"


*step 7 // Run the regressions. This primarily uses the hours 14-17 data from step 6. One regression uses the "HUGE" dataset which uses hours 0-23 to create figure 5. This regression step creates a lot of regsave documents which are used to make the tables and figures. 
do "stata do/step_7_run_regressions.do"


*step 8 /// this step runs a bunch of calculations that are used in the tables 
do "stata do/step_8_tables_prepare.do"



//////
//Build Tables 
//////
*start here

*Table 1 - summary stat table pulling from many sources
do "stata do/table_1_assemble.do"

*Table 2 - First stage
do "stata do/table_2_assemble.do"

*Table 3 - Overall regressions
do "stata do/table_3_assemble.do"

*Table 4 - Non event hour regressions 
do "stata do/table_4_assemble.do"

*Table 5 - establishment temperature regressions
do "stata do/table_5_assemble.do"

*Table 6 - forecast temperature regressions
do "stata do/table_6_assemble.do"

*Table 7 - results by customer facing vs non-customer facing 
do "stata do/table_7_assemble.do"

*Table 8 - welfare impacts components 
do "stata do/table_8_assemble.do"

*Table 9 - welfare impacts for 2015 event days 
do "stata do/table_9_assemble.do"

*Table 10 - welfare impacts of peak pricing under alternate scenarios 
do "stata do/table_10_assemble.do"

*Table 11 - welfare impacts of peak pricing compared to first-best, real time price 
do "stata do/table_11_assemble.do"

//////
//Build Figures
//////


*figure 1
do "stata do/figure_1.do"

*figure 2
do "stata do/figure_2.do"

*figure 3 pre period treatment and control residuals comparison 
do "stata do/figure_3.do"

*figure 4 hourly treatment effects on event days
do "stata do/figure_4.do"








rcd "T:\Home\Josh\CI project - code only\stata do": find *.do , match("stata data/struct winner by sa") show
